选择和索引数据
1> .loc()
标签索引
df = pd.DataFrame(np.random.randn(5,4),index = ['a','b','c','d','e'],columns = ['A','B','C','D'])
print(df)
# 输出结果:
# A B C D
# a 0.774623 -0.342861 0.125304 0.902453
# b 0.817201 0.228357 -0.722603 0.484725
# c -0.471602 -1.626748 1.249594 1.059656
# d -1.535207 -1.344895 -0.983298 -0.402143
# e -1.024832 -1.205479 -1.275566 -0.925932
print(df.loc[:,'B'])
# 输出结果:
# a -0.342861
# b 0.228357
# c -1.626748
# d -1.344895
# e -1.205479
# Name: B, dtype: float64
print(df.loc[:,['A','C']])
# 输出结果:
# A C
# a 0.774623 0.125304
# b 0.817201 -0.722603
# c -0.471602 1.249594
# d -1.535207 -0.983298
# e -1.024832 -1.275566
print(df.loc[['a','b','e'],['A','C']])
# 输出结果:
# A C
# a 0.774623 0.125304
# b 0.817201 -0.722603
# e -1.024832 -1.275566
print(df.loc['a':'d'])
# 输出结果:
# A B C D
# a 0.774623 -0.342861 0.125304 0.902453
# b 0.817201 0.228357 -0.722603 0.484725
# c -0.471602 -1.626748 1.249594 1.059656
# d -1.535207 -1.344895 -0.983298 -0.402143
print(df.loc['a'] > 0)
# 输出结果:
# A True
# B False
# C True
# D True
# Name: a, dtype: bool
2> .iloc()
整数索引
df = pd.DataFrame(np.random.randn(5,4),columns = ['A','B','C','D'])
print(df)
# 输出结果:
# A B C D
# 0 -1.411028 -0.645123 0.553776 0.696463
# 1 1.377117 0.974366 -1.244348 -0.027383
# 2 -0.846788 -0.632638 1.146354 -0.885172
# 3 -1.389149 -0.947130 -1.988737 -2.035326
# 4 -0.116252 1.075516 -1.195252 -1.519359
print(df.iloc[:4])
# 输出结果:
# A B C D
# 0 -1.411028 -0.645123 0.553776 0.696463
# 1 1.377117 0.974366 -1.244348 -0.027383
# 2 -0.846788 -0.632638 1.146354 -0.885172
# 3 -1.389149 -0.947130 -1.988737 -2.035326
print(df.iloc[1:5,2:4])
# 输出结果:
# C D
# 1 -1.244348 -0.027383
# 2 1.146354 -0.885172
# 3 -1.988737 -2.035326
# 4 -1.195252 -1.519359
print(df.iloc[[1, 3, 4],[1,3]])
# 输出结果:
# B D
# 1 0.974366 -0.027383
# 3 -0.947130 -2.035326
# 4 1.075516 -1.519359
print(df.iloc[1:3,:])
# 输出结果:
# A B C D
# 1 1.377117 0.974366 -1.244348 -0.027383
# 2 -0.846788 -0.632638 1.146354 -0.885172
print(df.iloc[:,1:3])
# 输出结果:
# B C
# 0 -0.645123 0.553776
# 1 0.974366 -1.244348
# 2 -0.632638 1.146354
# 3 -0.947130 -1.988737
# 4 1.075516 -1.195252
3> 属性访问
直接使用行列索引(先列后行)
使用属性运算符"."选择列
df = pd.DataFrame(np.random.randn(5,4),columns = ['A','B','C','D'])
print(df.A)
print(df['A'][3]) # 输出结果: -0.056075
# 输出结果:
# 0 1.122023
# 1 0.081673
# 2 0.051266
# 3 -0.056075
# 4 0.682470
# Name: A, dtype: float64